home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Amiga Plus Extra 1997 #1
/
Amiga Plus Extra 1997 #1.iso
/
programme
/
tools
/
leoutils
/
words.c
< prev
next >
Wrap
C/C++ Source or Header
|
1996-11-25
|
7KB
|
292 lines
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "LeoLib.h"
#if defined _AMIGA || defined AMIGA
#if defined __SASC
#include <proto/dos.h>
#else
#include <clib/dos_protos.h>
#endif
#include <dos/dosasl.h>
#endif
#if defined _AMIGA || defined AMIGA
static char *Version="$VER:Words 2.1"
#if defined __SASC
" " "(21.11.94)" /*__AMIGADATE__*/ " ©1994 Leopold-Soft"
#endif
;
#endif
typedef unsigned char uchar;
typedef int bool;
#define TRUE 1
#define FALSE 0
static bool AddFile(char *InFile);
static bool DisplayResults(char *OutFile);
static bool AddWord(uchar *Word, int n);
/* In the following ansi-table, 0 = Non-word character
1 = May be _inside_ a word
2 = Actual word character, every word
must contain at least one of these
*/
static uchar char_type[] =
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0 */
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 1 */
0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0, /* 2 */
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 3 */
0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /* 4 */
2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,1, /* 5 */
1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /* 6 */
2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0, /* 7 */
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 8 */
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 9 */
0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, /* a */
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* b */
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /* c */
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /* d */
2,2,2,2,2,2,2,0,2,2,2,2,2,2,2,2, /* e */
2,2,2,2,2,2,2,0,2,2,2,2,2,2,2,2}; /* f */
static void *IOBuff;
static char *ProgName;
static bool Verbose = FALSE;
int main(int argc, char **argv) {
static int CurrArg = 0;
static char *OutFile = NULL;
static bool FileRead = FALSE;
#if defined _AMIGA || defined AMIGA
struct AnchorPath *AP;
if (!(AP = calloc(1, sizeof(struct AnchorPath) + 1024))) return 30;
AP->ap_Strlen = 1024;
#endif
ProgName = argv[0];
if (!(IOBuff = malloc(16384))) {
fprintf(stderr, "%s: Couldn't allocate 16KB!\n", ProgName);
return 30;
}
while(++CurrArg < argc) {
if (!strcmp(argv[CurrArg],"-o")) {
if (argc > CurrArg+1) {
OutFile = argv[++CurrArg];
if (Verbose) printf("Outfile set to %s...\n", OutFile);
} else {
fprintf(stderr,"%s: -o option requires a parameter!\n", ProgName);
}
} else if (!strcmp(argv[CurrArg],"-V")) {
if (!Verbose) fprintf(stderr, "Verbose: ON\n");
Verbose = TRUE;
} else if (!strcmp(argv[CurrArg],"-v")) {
if (Verbose) fprintf(stderr, "Verbose: OFF\n");
Verbose = FALSE;
} else if (!strcmp(argv[CurrArg],"-h") || !strcmp(argv[CurrArg],"?")) {
printf("\nWords 2.1 by Henrik Herranen " __DATE__ "\n\n"
"Usage: %s [?|-h] | [-v] | [-V] | [InFile1 [InFile2 [...]]] | [-o OutFile]\n\n"
, ProgName);
FileRead = TRUE;
} else {
#if defined _AMIGA || defined AMIGA
/* Let's do Amiga pattern matching */
if (MatchFirst(argv[CurrArg], AP)) {
fprintf(stderr, "%s: Couldn't open %s for reading!\n", ProgName, argv[CurrArg]);
fprintf(stderr, "%s: Failed!\n", ProgName);
return 30;
} else {
do {
if (AP->ap_Info.fib_DirEntryType < 0) {
fprintf(stderr,"%s: Sorting %s\n", ProgName, AP->ap_Buf);
if (!AddFile(AP->ap_Buf)) {
fprintf(stderr, "%s: Failed for %s!\n", ProgName, AP->ap_Buf);
MatchEnd(AP);
return 30;
}
} else {
fprintf(stderr,"%s: Skipping directory %s\n", ProgName, AP->ap_Buf);
}
} while (!(MatchNext(AP)));
MatchEnd(AP);
}
#else
fprintf(stderr,"%s: Sorting %s\n", ProgName, argv[CurrArg]);
if (!AddFile(argv[CurrArg])) {
fprintf(stderr, "%s: Failed for %s!\n", ProgName, argv[CurrArg]);
return 30;
}
#endif
FileRead = TRUE;
}
}
if (!FileRead && !AddFile(NULL) || !DisplayResults(OutFile)) {
fprintf(stderr, "%s: Failed!\n", ProgName);
return 30;
}
return 0;
}
static uchar Word[81];
static int WordLen;
static bool AddFile(char *Name) {
FILE *InFile;
int origc, c;
WordLen = 0;
if (Name) {
if (!(InFile = fopen(Name, "ra"))) {
fprintf(stderr, "%s: Couldn't open %s for reading!\n", ProgName, Name);
return FALSE;
}
if (IOBuff) setvbuf(InFile, IOBuff, _IOFBF, (size_t) 16384);
} else {
InFile = stdin;
}
if (Verbose) fprintf(stderr, "Reading & sorting file %s...\n", Name);
do {
c = origc = fgetc(InFile);
if (c == -1) c = ' ';
switch (char_type[c]) {
case 0: /* Non-word character */
if (WordLen > 0) {
while(char_type[Word[WordLen-1]] == 1) WordLen--;
Word[WordLen] = '\000';
if (!AddWord(Word, 1)) {
fprintf(stderr, "%s: Out of memory!\n", ProgName);
return FALSE;
}
WordLen = 0;
}
break;
case 1: /* May be _inside_ a word */
if (WordLen > 0 && WordLen < 80) {
Word[WordLen++] = ToUpper(c);
}
break;
case 2: /* Actual word character */
if (WordLen < 80) {
Word[WordLen++] = ToUpper(c);
}
break;
}
} while (origc != -1);
if (Name) fclose(InFile);
return TRUE;
}
struct WordNode {
struct WordNode *Next;
struct WordNode *Prev;
uchar *Word;
long Amount;
};
static struct WordNode *FirstNode = NULL;
long Words, Unique;
static struct WordNode *NewWord(uchar *Word, int n) {
static struct WordNode *tmp;
Unique++;
if (!(tmp = malloc(sizeof(struct WordNode) + strlen((char *) Word) * sizeof(uchar) + 1))) return NULL;
tmp->Prev = NULL;
tmp->Next = NULL;
tmp->Word = (uchar *) (tmp + 1); /* Right after the structure */
tmp->Amount = n;
strcpy((char *) (tmp->Word), (char *) Word);
return tmp;
}
static bool AddWordRec(uchar *Word, int n, struct WordNode *Node) {
int CmpRes = strcmp((char *) Word, (char *) (Node->Word));
if (CmpRes < 0) {
if (!(Node->Prev)) return ((Node->Prev = NewWord(Word, n)) != NULL);
else return AddWordRec(Word, n, Node->Prev);
} else if (CmpRes > 0) {
if (!(Node->Next)) return ((Node->Next = NewWord(Word, n)) != NULL);
else return AddWordRec(Word, n, Node->Next);
} else {
Node->Amount += n;
}
return TRUE;
}
static bool AddWord(uchar *Word, int n) {
Words++;
if (!FirstNode) return ((FirstNode = NewWord(Word, n)) != NULL);
return AddWordRec(Word, n, FirstNode);
}
static long Largest, NewLargest, Amount;
static FILE *OutFile;
static void DisplayResultsRec(struct WordNode *Node) {
if (!Node) return;
DisplayResultsRec(Node->Prev);
if (Node->Amount > NewLargest && Node->Amount < Largest) NewLargest = Node->Amount;
else if (Node->Amount == Largest) {
int P = (Largest * 20000 / Words + 1) / 2;
fprintf(OutFile, "%1d.%02d%% %5d %s\n", P / 100, P % 100, Largest, Node->Word);
Amount++;
}
DisplayResultsRec(Node->Next);
}
static bool DisplayResults(char *Name) {
if (!FirstNode) return TRUE;
if (Name) {
if (!(OutFile = fopen(Name, "w"))) {
fprintf(stderr, "%s: Couldn't open %s for writing!\n", ProgName, Name);
return FALSE;
}
if (IOBuff) setvbuf(OutFile, IOBuff, _IOFBF, (size_t) 16384);
} else {
OutFile = stdout;
}
fprintf(OutFile, "\nWords: %6d\nUnique: %5d\n\n", Words, Unique);
Largest = 1<<30; /* A huge value */
while (Largest > 0) {
NewLargest = 0;
Amount = 0;
DisplayResultsRec(FirstNode);
if (Verbose && Largest < 1<<30) {
fprintf(stderr, "Printed %3d... %d found \015", Largest, Amount);
fflush(stderr);
}
Largest = NewLargest;
}
if (Verbose) fprintf(stderr, "\n");
fprintf(OutFile, "\n");
if (Name) fclose(OutFile);
return TRUE;
}